/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package mj.ocraptor.extraction.tika.parser.xml;

import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.xml.sax.Attributes;

/**
 * SAX event handler that maps the contents of an XML element into a metadata
 * field.
 *
 * @since Apache Tika 0.10
 */
public class ElementMetadataHandler extends AbstractMetadataHandler {
	/**
	 * Logger for this class
	 */
	private static final Log logger = LogFactory
			.getLog(ElementMetadataHandler.class);

	private static final String LOCAL_NAME_RDF_BAG = "Bag";
	private static final String LOCAL_NAME_RDF_LI = "li";
	private static final String URI_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";

	private final String uri;

	private final String localName;

	private final Metadata metadata;

	private final String name;
	private Property targetProperty;

	private final boolean allowDuplicateValues;
	private final boolean allowEmptyValues;

	/**
	 * The buffer used to capture characters when inside a bag li element.
	 */
	private final StringBuilder bufferBagged = new StringBuilder();

	/**
	 * The buffer used to capture characters inside standard elements.
	 */
	private final StringBuilder bufferBagless = new StringBuilder();

	/**
	 * Whether or not the value was found in a standard element structure or
	 * inside a bag.
	 */
	private boolean isBagless = true;

	private int matchLevel = 0;
	private int parentMatchLevel = 0;

	/**
	 * Constructor for string metadata keys.
	 *
	 * @param uri
	 *            the uri of the namespace of the element
	 * @param localName
	 *            the local name of the element
	 * @param metadata
	 *            the Tika metadata object to populate
	 * @param name
	 *            the Tika metadata field key
	 */
	public ElementMetadataHandler(String uri, String localName,
			Metadata metadata, String name) {
		super(metadata, name);
		this.uri = uri;
		this.localName = localName;
		this.metadata = metadata;
		this.name = name;
		this.allowDuplicateValues = false;
		this.allowEmptyValues = false;
		if (logger.isTraceEnabled()) {
			logger.trace("created simple handler for " + this.name);
		}
	}

	/**
	 * Constructor for string metadata keys which allows change of behavior for
	 * duplicate and empty entry values.
	 *
	 * @param uri
	 *            the uri of the namespace of the element
	 * @param localName
	 *            the local name of the element
	 * @param metadata
	 *            the Tika metadata object to populate
	 * @param name
	 *            the Tika metadata field key
	 * @param allowDuplicateValues
	 *            add duplicate values to the Tika metadata
	 * @param allowEmptyValues
	 *            add empty values to the Tika metadata
	 */
	public ElementMetadataHandler(String uri, String localName,
			Metadata metadata, String name, boolean allowDuplicateValues,
			boolean allowEmptyValues) {
		super(metadata, name);
		this.uri = uri;
		this.localName = localName;
		this.metadata = metadata;
		this.name = name;
		this.allowDuplicateValues = allowDuplicateValues;
		this.allowEmptyValues = allowEmptyValues;
		if (logger.isTraceEnabled()) {
			logger.trace("created simple handler for " + this.name);
		}
	}

	/**
	 * Constructor for Property metadata keys.
	 *
	 * @param uri
	 *            the uri of the namespace of the element
	 * @param localName
	 *            the local name of the element
	 * @param metadata
	 *            the Tika metadata object to populate
	 * @param targetProperty
	 *            the Tika metadata Property key
	 */
	public ElementMetadataHandler(String uri, String localName,
			Metadata metadata, Property targetProperty) {
		super(metadata, targetProperty);
		this.uri = uri;
		this.localName = localName;
		this.metadata = metadata;
		this.targetProperty = targetProperty;
		this.name = targetProperty.getName();
		this.allowDuplicateValues = false;
		this.allowEmptyValues = false;
		if (logger.isTraceEnabled()) {
			logger.trace("created property handler for " + this.name);
		}
	}

	/**
	 * Constructor for Property metadata keys which allows change of behavior
	 * for duplicate and empty entry values.
	 *
	 * @param uri
	 *            the uri of the namespace of the element
	 * @param localName
	 *            the local name of the element
	 * @param metadata
	 *            the Tika metadata object to populate
	 * @param targetProperty
	 *            the Tika metadata Property key
	 * @param allowDuplicateValues
	 *            add duplicate values to the Tika metadata
	 * @param allowEmptyValues
	 *            add empty values to the Tika metadata
	 */
	public ElementMetadataHandler(String uri, String localName,
			Metadata metadata, Property targetProperty,
			boolean allowDuplicateValues, boolean allowEmptyValues) {
		super(metadata, targetProperty);
		this.uri = uri;
		this.localName = localName;
		this.metadata = metadata;
		this.targetProperty = targetProperty;
		this.name = targetProperty.getName();
		this.allowDuplicateValues = allowDuplicateValues;
		this.allowEmptyValues = allowEmptyValues;
		if (logger.isTraceEnabled()) {
			logger.trace("created property handler for " + this.name);
		}
	}

	protected boolean isMatchingParentElement(String uri, String localName) {
		return (uri.equals(this.uri) && localName.equals(this.localName));
	}

	protected boolean isMatchingElement(String uri, String localName) {
		// match if we're inside the parent element or within some bag element
		return (uri.equals(this.uri) && localName.equals(this.localName))
				|| (parentMatchLevel > 0 && ((uri.equals(URI_RDF) && localName
						.equals(LOCAL_NAME_RDF_BAG)) || (uri.equals(URI_RDF) && localName
						.equals(LOCAL_NAME_RDF_LI))));
	}

	@Override
	public void startElement(String uri, String localName, String name,
			Attributes attributes) {
		if (isMatchingElement(uri, localName)) {
			matchLevel++;
		}
		if (isMatchingParentElement(uri, localName)) {
			parentMatchLevel++;
		}
	}

	@Override
	public void endElement(String uri, String localName, String name) {
		if (isMatchingParentElement(uri, localName)) {
			parentMatchLevel--;
		}
		if (isMatchingElement(uri, localName)) {
			matchLevel--;
			if (matchLevel == 2) {
				// we're inside a bag li element, add the bagged buffer
				addMetadata(bufferBagged.toString().trim());
				bufferBagged.setLength(0);
				isBagless = false;
			}
			if (matchLevel == 0 && isBagless) {
				String valueBagless = bufferBagless.toString();
				if (valueBagless.length() > 0
						&& !valueBagless.contains(LOCAL_NAME_RDF_BAG)) {
					// we're in a standard element, add the bagless buffer
					addMetadata(valueBagless.trim());
					bufferBagless.setLength(0);
				}
				isBagless = true;
			}
		}
	}

	@Override
	public void characters(char[] ch, int start, int length) {
		// We need to append to both buffers since we don't if we're inside a
		// bag until we're done
		if (parentMatchLevel > 0 && matchLevel > 2) {
			bufferBagged.append(ch, start, length);
		}
		if (parentMatchLevel > 0 && matchLevel > 0) {
			bufferBagless.append(ch, start, length);
		}
	}

	@Override
	public void ignorableWhitespace(char[] ch, int start, int length) {
		characters(ch, start, length);
	}

	@Override
	protected void addMetadata(String value) {
		if (logger.isTraceEnabled()) {
			logger.trace("adding " + name + "=" + value);
		}
		if (targetProperty != null && targetProperty.isMultiValuePermitted()) {
			if ((value != null && value.length() > 0) || allowEmptyValues) {
				if (value == null || value.length() == 0 && allowEmptyValues) {
					value = "";
				}
				String[] previous = metadata.getValues(name);
				if (previous == null
						|| !Arrays.asList(previous).contains(value)
						|| allowDuplicateValues) {
					metadata.add(targetProperty, value);
				}
			}
		} else {
			super.addMetadata(value);
		}
	}
}
